import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import mplfinance as mpf
import matplotlib.dates as mdates
import datetime as dt
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots
pio.renderers.default = "notebook"
pio.templates.default = "plotly_dark"
import gc
import warnings
warnings.filterwarnings('ignore')
plt.rcParams['figure.figsize'] = [12, 8]

A brief list of topics are:
Value at risk is a measure used by some finance people to quantify risk of of an investment or of a portfolio and it's quoted in units of dollars for a given probability and time horizon. For example, if it says lets's say 1%, one-year value at risk of 10 million, it means that there is a 1% chance that the portfolio will lose 10 million in one year.
The stress test is a test usually ordered by government to see how some firm will stand up to a financial crisis.
snp = pd.read_csv('Data/GSPC.csv', parse_dates=['Date'], index_col='Date')
snp.head()
px.line(snp, x=snp.index, y='Close', title='S&P 500')
Beta gives a measure of how much a stock moves in relation to the market. A $\beta$ of 2 means that the stock moves twice as much as the market. A $\beta$ of 0.5 means that the stock moves half as much as the market.
apple = pd.read_csv('Data/AAPL.csv', parse_dates=['Date'], index_col='Date')
google = pd.read_csv('Data/GOOG.csv', parse_dates=['Date'], index_col='Date')
apple.head()
def merge_two_stocks(df1:pd.DataFrame, df2:pd.DataFrame, names=["df1", "df2"], columns=None, date_too=True)->pd.DataFrame:
"""
Merge two stocks together on index (Assumes index is date)
Parameters
----------
df1 : pd.DataFrame
First dataframe
df2 : pd.DataFrame
Second dataframe
names : list, optional
Names of the two dataframes (Stock names, suffix will be decided by it), by default ["df1", "df2"]
columns : list, optional
Columns to merge, by default None
date_too : bool, optional
Whether to include the date column, by default True
Returns
-------
pd.DataFrame
Merged dataframe
"""
df1 = df1.copy()
df2 = df2.copy()
if columns:
df1 = df1[columns]
df2 = df2[columns]
df1.index = pd.Series(df1.index).apply(lambda x: x.strftime("%Y-%m-%d"))
df2.index = pd.Series(df2.index).apply(lambda x: x.strftime("%Y-%m-%d"))
df = df1.merge(
df2,
how="inner",
left_index=True,
right_index=True,
suffixes=("_" + names[0], "_" + names[1]),
)
if date_too:
df.index = pd.to_datetime(df.index)
df["Date"] = df.index
if len(columns) == 1 and date_too:
df.columns = [names[0], names[1], "Date"]
elif len(columns) == 1 and not date_too:
df.columns = [names[0], names[1]]
return df
apple_google = merge_two_stocks(apple, google, names=["Apple", "Google"], date_too=False, columns=["Open", "Close"])
apple_snp = merge_two_stocks(apple, snp, names=["Apple", "S&P 500"], columns=['Close'])
apple_snp.head()
apple_google.head()
#plot apple and snp with different y axis
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=apple_snp.Date, y=apple_snp['Apple'], name="Apple"),
secondary_y=False,
)
fig.add_trace(
go.Scatter(x=apple_snp.Date, y=apple_snp['S&P 500'], name="S&P 500"),
secondary_y=True,
)
# Set figure title
fig.update_layout(
title_text="Apple vs S&P 500"
)
# Set x-axis title
fig.update_xaxes(title_text="Date")
# Set y-axes titles
fig.update_yaxes(title_text="<b>primary</b> S&P 500", secondary_y=False)
fig.update_yaxes(title_text="<b>secondary</b> Apple", secondary_y=True)
Beta could be calculated by first dividing the security's standard deviation of returns by the benchmark's standard deviation of returns. The resulting value is multiplied by the correlation of the security's returns and the benchmark's returns. Mathematically, the formula is: $$ \beta = \frac{\sigma_{s}}{\sigma_{b}} \times \rho_{s,b} $$ where $\sigma_{s}$ is the standard deviation of the security's returns, $\sigma_{b}$ is the standard deviation of the benchmark's returns, and $\rho_{s,b}$ is the correlation between the security's returns and the benchmark's returns.
apple_snp["Apple_returns"] = apple_snp["Apple"].pct_change()
apple_snp["S&P_returns"] = apple_snp["S&P 500"].pct_change()
apple_snp.dropna(inplace=True)
apple_snp
#plot monthly return
apple_snp_month = apple_snp.asfreq("M", method="ffill")
apple_snp_month.head()
# Plot returns
fig = make_subplots()
fig.add_traces(
[
go.Scatter(y=apple_snp_month["S&P_returns"], x=apple_snp_month.index,opacity=1.0, name="S&P"),
go.Scatter(y=apple_snp_month["Apple_returns"], x=apple_snp_month.index,opacity=0.3, name="Apple")
]
)
fig = px.scatter(apple_snp_month, x="S&P_returns", y="Apple_returns", trendline="ols", trendline_color_override="red")
fig.update_layout(
title="Apple vs S&P 500 (Monthly Returns)",
xaxis_title="S&P 500 Returns",
yaxis_title="Apple Returns",
font=dict(
family="Courier New, monospace",
size=18,
color="#7f7f7f"
)
)
fig.show()
$\beta$ is nothing but the slope of the regression line of the security's returns on the benchmark's returns.
# Calculate beta for apple
covariance = apple_snp_month["Apple_returns"].cov(apple_snp_month["S&P_returns"])
variance = apple_snp_month["S&P_returns"].var()
apple_beta = covariance / variance
print(f"Apple's beta is {apple_beta}")
Let's see how this varies year by year.
def clc_beta(year):
data = apple_snp[apple_snp["Date"].dt.year == year]
covariance = data["Apple_returns"].cov(data["S&P_returns"])
variance = data["S&P_returns"].var()
apple_beta = covariance / variance
return apple_beta
betas = []
years = np.arange(1980,2023)
for year in years:
betas.append(clc_beta(year))
fig = px.line(x=years, y=betas)
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text=r"Beta for Apple")
Market risk is the risk that affects all stocks in the market. Idiosyncratic risk is the risk that affects only one stock.
np.random.seed(42)
normal = np.random.normal(0,1,2000)
cauchy = np.random.standard_cauchy(2000)
distribution = np.array([normal, cauchy]).T
distribution = pd.DataFrame(distribution, columns = ["Normal", "Cauchy"])
distribution
fig = make_subplots()
fig.add_traces(
[
go.Histogram(x=distribution["Normal"], name="Normal Distribution"),
go.Histogram(x=distribution["Cauchy"], name="Cauchy Distribution")
]
)
We can see that the Cauchy distribution is fait tailed. To see it clearly, let's plot the 'return' of both the distributions.
distribution["Normal_Returns"] = distribution["Normal"].pct_change()
distribution["Cauchy_Returns"] = distribution["Cauchy"].pct_change()
distribution.dropna(inplace=True)
fig = make_subplots()
fig.add_traces(
[
go.Scatter(y=distribution["Normal_Returns"], name="Normal Distribution",opacity=0.5),
go.Scatter(y=distribution["Cauchy_Returns"], name="Cauchy Distribution",opacity=0.5)
]
)
A many number of huge spikes shows that in Cauchy distribution, values even very far away from mean has good probability of happening.
distribution.describe()
The central limit theorem says that the sum of a large number of independent random variables will be approximately normally distributed. (This does not work if the underlining distribution is fait tailed.)
Let's see if the central limit theorem holds for Cauchy distribution.
means = []
for _ in range(1000):
cauchy = np.random.standard_cauchy(2000)
means.append(cauchy.mean())
fig = px.histogram(x=cauchy)
fig.update_xaxes(title_text="Value From Cauchy Distribution")
fig.update_yaxes(title_text=r"Count")
CLT is not valid!
Covarinace between two stocks measures how independent the two stocks are. If the covariance is zero, the two stocks are independent. If the covariance is positive, the two stocks tend to move in the same direction. If the covariance is negative, the two stocks tend to move in opposite directions. Mathematically, covariance is defined as: $$ \operatorname{cov}(X, Y)=\operatorname{E}\left[(X-\mu_{X})(Y-\mu_{Y})\right] $$ where $\mu_X$ and $\mu_Y$ are the means of $X$ and $Y$ respectively.
For example, let's calculate the covariance between the close price of Apple and Google.
# Calculate covariance between apple and google
covariance = apple_google["Close_Apple"].cov(apple_google["Close_Google"])
print(f"The covariance between Apple and Google is {covariance}")
The formula for correlation is: $$ \rho_{X,Y} = \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y} $$ where $\sigma_X$ and $\sigma_Y$ are the standard deviations of $X$ and $Y$ respectively. Let's calculate the correlation between the close price of Apple and Google.
# Calculate correlation between apple and google
correlation = apple_google["Close_Apple"].corr(apple_google["Close_Google"])
print(f"The correlation between Apple and Google is {correlation}")
What about the correlation between the returns? Let's calculate it.
apple_google["Return_apple"] = apple_google["Close_Apple"].pct_change()
apple_google["Return_google"] = apple_google["Close_Google"].pct_change()
apple_google.dropna(inplace=True)
corr = apple_google["Return_apple"].corr(apple_google["Return_google"])
print(f"The correlation between Apple and Google is {corr}")
Great! This correlation is far all the data. Let's calculate this far the past year.
apple_google_last_year = apple_google[apple_google.index > "2022-01-01"]
new_corr = apple_google_last_year["Return_apple"].corr(apple_google_last_year["Return_google"])
print(f"The correlation between Apple and Google is {new_corr}")
This correlation is close to 1. This means that the two stocks tend to move in the same direction.
corr = apple_snp["Apple_returns"].corr(apple_snp["S&P_returns"])
print(f"The correlation between Apple and S&P is {corr}")
Assuming independence, the distribution of clain follows binomial distribution. If there are $n$ policies and each have probability $p$ of claim, the risk of the total claim is $$ \sigma=\sqrt{p(1-p)/n} $$ This means that if $n$ is large, the standard deviation is small. This is the Law of Large Numbers. This is the idea of risk pooling.
binomial = np.random.binomial(100, 0.5, 1000)
fig = px.histogram(x=binomial)
fig.update_xaxes(title_text="Value From Binomial Distribution")
fig.update_yaxes(title_text=r"Count")
fig.show()
mean = binomial.mean()
print(f"The mean of the binomial distribution is {mean}")
std = binomial.std()
print(f"The standard deviation of the binomial distribution is {std}")
Moral hazard is the tendency of people to take more risk when they are insured. Selection bias is the tendency of people to buy insurance when they are more likely to have a claim.
HMO is a type of health insurance that provides health care through a network of doctors and hospitals. The HMO is paid a fixed amount per month for each member. The HMO pays the doctors and hospitals a fixed amount for each service. This way, the doctors have an incentive to keep the patients healthy.
Risk is inherent in investment.
All should matter to an investor is the performance of the enitre portfolio. The performance of the individual stocks should not matter. Only the mean and variance of the portfolio should matter.
It is a risk that affects all stocks in the market. It is also called market risk.
It's a model of the optimal portfolio. It asserts that all investors will hold the optimal portfolio. But as not everyone holds the optimal portfolio, the model is only the half truth.
The model assumes that everyone is rational. It assumes that nobody has any risks that are inherent to them.

The basic equation of CAPM reads: $$ E(r_i) = r_f + \beta_i (E(r_m) - r_f) $$ where $r_i$ is the return of the stock, $r_f$ is the risk-free rate, $\beta_i$ is the beta of the stock, and $E(r_m)$ is the expected return of the market.
What is says is this: the expected return of a stock is the risk-free rate plus the beta of the stock times the expected return of the market minus the risk-free rate.
What is the risk-free rate? It is the return of a risk-free asset. For example, the return of a 10-year US Treasury bond.
Holding negative shares of a stock is called short selling. It is a way to bet against a stock. For example, if you think that a stock will go down, you can short sell it. If you are right, you will make money. If you are wrong, you will lose money.
This works by borrowing the stock from someone and selling it. Then you buy the stock back at a lower price and return it to the original owner. The difference between the two prices is your profit. Usually, the broker will lend you the stock at a small fee.
In CAMP model, short selling is allowed however, we must assume that on average this is negligible. Because if it is great, everyone will do it and the problem will arise that who will lend the stock.


The efficient portfolio of frontier expresses the standard deviation of the portfolio in terms of $r$ the expected return on the portfolio instead of $x_1$.

If a company has a constant growth rate, the value of the company is $$ V = \frac{D_1}{r-g} $$ where $D_1$ is the dividend in the next year, $r$ is the rate of discount, and $g$ is the growth rate.
In terms of a security, say a land which has a constant growth rate, the value of the land is $$ V = \frac{D_1}{r-g} $$ where $D_1$ is the rent in the next year, $r$ is the rate of discount, and $g$ is the growth rate.
This can be calculated by summing up the infinite series: $$ V = \frac{D_1}{1+r} + \frac{D_1(1+g)}{(1+r)^2} + \frac{D_1(1+g)^2}{(1+r)^3} + \cdots $$
$r$ is the risk of the security. The equation estimates the current price of the asset. If the current price is higher than the estimated price, the asset is overvalued. If the current price is lower than the estimated price, the asset is undervalued.